
******************************************************
******************************************************	
* Figure 8
******************************************************
******************************************************	

foreach s in 3 5 7 {
	  
* MATLAB to Stata

infile firm_id firm_age firm_cohort prod_id prod_quality prod_age prod_cohort quarter using INPUT/data_for_Stata_seed`s'00.csv, clear

save INTERMEDIATE/data_seed`s'00.dta, replace

* Total quality & total # products

use INTERMEDIATE/data_seed`s'00.dta, clear
egen product=group( prod_id  firm_id )
distinct product prod_id

gen number_products=1
gen prod_quality5=prod_quality^5
gen log_q=log(prod_quality)
collapse (sum) number_products prod_quality prod_quality5 log_q1=log_q (mean) log_q prod_quality5v=prod_quality5, by(quarter)
gen Q=prod_quality5^(1/5)
gen log_Q=log(Q)
keep Q log_q log_Q quarter
save INTERMEDIATE/Aggregates_seed`s'00.dta, replace

}


use INTERMEDIATE/data_seed300.dta, clear
merge m:1 quarter using INTERMEDIATE/Aggregates_seed300.dta, nogenerate
gen seed=300
tempfile x3
save `x3', replace

use INTERMEDIATE/data_seed500.dta, clear
merge m:1 quarter using INTERMEDIATE/Aggregates_seed500.dta, nogenerate
gen seed=500
tempfile x5
save `x5', replace

use INTERMEDIATE/data_seed700.dta, clear
merge m:1 quarter using INTERMEDIATE/Aggregates_seed700.dta, nogenerate
gen seed=700
tempfile x7
save `x7', replace

use `x3', clear
append using `x5'
append using `x7'

egen product=group( prod_id firm_id seed )
egen firm=group(firm_id seed)
egen fe=group(quarter seed)
save INTERMEDIATE/data_pooled.dta, replace










******************************************************
* Firm Size Distribution (sales deciles)
******************************************************

use INTERMEDIATE/data_pooled.dta, clear

* DETERMINE AGE
*Defines cohort/entryTime, exitTime, age, and MaxAge
bysort product: egen x=min(quarter)
gen lcensored=(x==1)
gen cohort=x 
bysort product: egen y=max(quarter)
qui su y
gen rcensored=(y==`r(max)') 
gen exitTime=y 
gen age=quarter-cohort 
bysort product: egen maxAge=max(age) 
bysort product: egen totalobs=count(quarter)

* Type of observation 
gen type="NA"
replace type="entry"      if age==0 & age!=maxAge  
replace type="continues"  if age>0  & age<maxAge  
replace type="exit"       if age==maxAge & age!=0 
replace type="entry/exit" if age==maxAge & age==0 
gen censored="NA"  // NEW & CORRECTED
replace censored="left censored"   if lcensored==1   & rcensored==0
replace censored="right censored"  if rcensored==1 & lcensored==0
replace censored="left censored & right censored" if lcensored==1  & rcensored==1   
replace censored="not censored" if lcensored==0  & rcensored==0
tab type censored, m

gen flag_longitudinal="NA"
replace flag_longitudinal="Complete" if totalobs==y-x+1
replace flag_longitudinal="Incomplete" if totalobs<y-x+1

gen Nentry=(type=="entry"| type=="entry/exit")
gen Nexit=(type=="exit"|type=="entry/exit")

gen N_it=1 
gen size_it = prod_quality^5

* Becomes a firmx year dataset
collapse (sum) N_it Nentry Nexit size_it (mean) log_q log_Q , by(firm seed fe quarter)

* Firm age variables
bysort firm: egen x=min(quarter)
gen lcensored=(x==1)
gen cohort=x 
bysort firm: egen y=max(quarter)
qui su y
gen rcensored=(y==`r(max)') 
gen exitTime=y 
gen age=quarter-cohort 
bysort firm: egen maxAge=max(age) 
bysort firm: egen totalobs=count(quarter)

* Type of observation 
gen type="NA"
replace type="entry"      if age==0 & age!=maxAge  
replace type="continues"  if age>0  & age<maxAge  
replace type="exit"       if age==maxAge & age!=0 
replace type="entry/exit" if age==maxAge & age==0 
gen censored="NA"  // NEW & CORRECTED
replace censored="left censored"   if lcensored==1   & rcensored==0
replace censored="right censored"  if rcensored==1 & lcensored==0
replace censored="left censored & right censored" if lcensored==1  & rcensored==1   
replace censored="not censored" if lcensored==0  & rcensored==0
tab type censored, m

gen flag_longitudinal="NA"
replace flag_longitudinal="Complete" if totalobs==y-x+1
replace flag_longitudinal="Incomplete" if totalobs<y-x+1

* Variables
gen log_size_it=log(size_it)
gen log_size_itA=log(size_it)-5*log_q+(2-6)*log_Q
gen comp2=log(N_it)
gen comp1=log_size_it -  comp2
gen comp1A=log_size_itA -  comp2

gen sample=0
replace sample=1 if (cohort>=2 & cohort<=23) & age<=16 & age>0 & maxAge>16 & flag_longitudinal=="Complete"
replace age=17 if sample==0 | age>16

qui tab cohort, gen(cohort_d)
foreach num of numlist 4/23 {  // 19
gen cohort_nd`num'=cohort_d`num'-((`num'-1)*cohort_d3-(`num'-2)*cohort_d2)
}


*Censoring
bys fe: egen xtile=xtile(log_size_it), nq(10)
gen log_size_it0=log_size_it
replace log_size_it0=. if xtile==1 | xtile==10
bys fe: egen xtile1=xtile(log_size_it0), nq(10)


foreach var of varlist log_size_it comp1 comp2 {

preserve
areg  `var' i.xtile1 i.cohort, absorb(fe)
matrix results=[e(b)',vecdiag(e(V))']
svmat results
rename results1 coef
rename results2 se

keep coef se
drop if coef == .
gen size=_n
drop if size>10
gen coef_u = coef + 1.96*sqrt(se)
gen coef_l = coef - 1.96*sqrt(se)
save INTERMEDIATE/LifeCycle_firm_`var'.dta, replace
restore
}

use INTERMEDIATE/LifeCycle_firm_log_size_it.dta, clear
rename coef coef_sales
merge 1:1 size using INTERMEDIATE/LifeCycle_firm_comp1.dta, nogenerate
rename coef coef_comp1
merge 1:1 size using INTERMEDIATE/LifeCycle_firm_comp2.dta, nogenerate
rename coef coef_comp2

twoway ///
(connected  coef_sales size if size<=10, lp(solid) color(black) lwidth(vthick)) ///
(connected  coef_comp1 size if size<=10, lp(shortdash) color(gray) lwidth(thick)) ///
(connected  coef_comp2 size if size<=10,  lp(longdash) color(gray) lwidth(medthick)) ///
, ytitle("Estimated (log scale)") xtitle("Size (decile)")  ///
title("") xlabel(1(1)10)  ylabel(0(3)12.6)  ///
legend( row(1) order(1 "Firm Sales" 3 "Scope" 2 "Appeal")) ///
graphregion(color(white)) plotregion(fcolor(white)) legend(region(lcolor(white))) note("")
graph export OUTPUT/Figure8a.eps, replace










******************************************************
* Firm Life Cycle (age)
******************************************************

use INTERMEDIATE/data_pooled.dta, clear
* DETERMINE AGE
*Defines cohort/entryTime, exitTime, age, and MaxAge
bysort product: egen x=min(quarter)
gen lcensored=(x==1)
gen cohort=x 
bysort product: egen y=max(quarter)
qui su y
gen rcensored=(y==`r(max)') 
gen exitTime=y 
gen age=quarter-cohort 
bysort product: egen maxAge=max(age) 
bysort product: egen totalobs=count(quarter)

* Type of observation 
gen type="NA"
replace type="entry"      if age==0 & age!=maxAge  
replace type="continues"  if age>0  & age<maxAge  
replace type="exit"       if age==maxAge & age!=0 
replace type="entry/exit" if age==maxAge & age==0 
gen censored="NA"  // NEW & CORRECTED
replace censored="left censored"   if lcensored==1   & rcensored==0
replace censored="right censored"  if rcensored==1 & lcensored==0
replace censored="left censored & right censored" if lcensored==1  & rcensored==1   
replace censored="not censored" if lcensored==0  & rcensored==0
tab type censored, m

gen flag_longitudinal="NA"
replace flag_longitudinal="Complete" if totalobs==y-x+1
replace flag_longitudinal="Incomplete" if totalobs<y-x+1

gen Nentry=(type=="entry"| type=="entry/exit")
gen Nexit=(type=="exit"|type=="entry/exit")

gen N_it=1 
gen size_it = prod_quality^5

* Becomes a firmx year dataset
collapse (sum) N_it Nentry Nexit size_it (mean) log_q log_Q , by(firm seed fe quarter)

* Firm age variables
bysort firm: egen x=min(quarter)
gen lcensored=(x==1)
gen cohort=x 
bysort firm: egen y=max(quarter)
qui su y
gen rcensored=(y==`r(max)') 
gen exitTime=y 
gen age=quarter-cohort 
bysort firm: egen maxAge=max(age) 
bysort firm: egen totalobs=count(quarter)

* Type of observation 
gen type="NA"
replace type="entry"      if age==0 & age!=maxAge  
replace type="continues"  if age>0  & age<maxAge  
replace type="exit"       if age==maxAge & age!=0 
replace type="entry/exit" if age==maxAge & age==0 
gen censored="NA"  // NEW & CORRECTED
replace censored="left censored"   if lcensored==1   & rcensored==0
replace censored="right censored"  if rcensored==1 & lcensored==0
replace censored="left censored & right censored" if lcensored==1  & rcensored==1   
replace censored="not censored" if lcensored==0  & rcensored==0
tab type censored, m

gen flag_longitudinal="NA"
replace flag_longitudinal="Complete" if totalobs==y-x+1
replace flag_longitudinal="Incomplete" if totalobs<y-x+1

* Variables
gen log_size_it=log(size_it)
gen log_size_itA=log(size_it)-5*log_q+(2-6)*log_Q
gen comp2=log(N_it)
gen comp1=log_size_it -  comp2
gen comp1A=log_size_itA -  comp2

gen sample=0
replace sample=1 if (cohort>=2 & cohort<=23) & age<=16 & age>0 & maxAge>16 & flag_longitudinal=="Complete"
replace age=17 if sample==0 | age>16

qui tab cohort, gen(cohort_d)
foreach num of numlist 4/23 {  // 19
gen cohort_nd`num'=cohort_d`num'-((`num'-1)*cohort_d3-(`num'-2)*cohort_d2)
}

foreach var of varlist log_size_it comp1 comp2 {

preserve
areg  `var' i.age cohort_nd* if sample==1, absorb(fe)
matrix results=[e(b)',vecdiag(e(V))']
svmat results
rename results1 coef
rename results2 se

keep coef se
drop if coef == .
gen age=_n
drop if age>16
gen coef_u = coef + 1.96*sqrt(se)
gen coef_l = coef - 1.96*sqrt(se)
save INTERMEDIATE/LifeCycle_firm_`var'.dta, replace
restore
}

use INTERMEDIATE/LifeCycle_firm_log_size_it.dta, clear
rename coef coef_sales
merge 1:1 age using INTERMEDIATE/LifeCycle_firm_comp1.dta, nogenerate
rename coef coef_comp1
merge 1:1 age using INTERMEDIATE/LifeCycle_firm_comp2.dta, nogenerate
rename coef coef_comp2

twoway ///
(connected  coef_sales age if age<=16, lp(solid) color(black) lwidth(vthick)) ///
(connected  coef_comp1 age if age<=16, lp(shortdash) color(gray) lwidth(thick)) ///
(connected  coef_comp2 age if age<=16,  lp(longdash) color(gray) lwidth(medthick)) ///
, ytitle("Estimated (log scale)") xtitle("Age (quarters)")   ///
title("") ylabel(0(0.25)1.02)  xlabel( 4 8 12 16) /// ///
legend( row(1) order(1 "Firm Sales" 3 "Scope" 2 "Appeal")) ///
graphregion(color(white)) plotregion(fcolor(white)) legend(region(lcolor(white))) note("")
graph export OUTPUT/Figure8b.eps, replace
